import pandas as pd
import random

# 读取CSV文件
data = pd.read_csv('filtered_data_2015_300w.csv')

# 随机选择5000条vt_detection为1的数据
malware_data = data[data['vt_detection'] == 1].sample(n=5000, random_state=1)

# 随机选择5000条vt_detection为0的数据
benign_data = data[data['vt_detection'] == 0].sample(n=5000, random_state=1)

# 保存到malware.csv和benign.csv文件
malware_data.to_csv('malware.csv', index=False)
benign_data.to_csv('benign.csv', index=False)